@@ -82,6 +82,13 @@ AWS_SANDBOX=false |
||
82 | 82 |
# Various Settings # |
83 | 83 |
######################## |
84 | 84 |
|
85 |
+# Specify the HTTP backend library for Faraday, used in WebsiteAgent. |
|
86 |
+# You can change this depending on the performance and stability you |
|
87 |
+# need for your service. Any choice other than "typhoeus", |
|
88 |
+# "net_http", "em_http" should require you to bundle a corresponding |
|
89 |
+# gem via Gemfile. |
|
90 |
+FARADAY_HTTP_BACKEND=typhoeus |
|
91 |
+ |
|
85 | 92 |
# Allow JSONPath eval expresions. i.e., $..price[?(@ < 20)] |
86 | 93 |
# You should not allow this on a shared Huginn box because it is not secure. |
87 | 94 |
ALLOW_JSONPATH_EVAL=false |
@@ -33,6 +33,8 @@ gem 'geokit', '~> 1.6.7' |
||
33 | 33 |
gem 'geokit-rails3', '~> 0.1.5' |
34 | 34 |
|
35 | 35 |
gem 'kramdown', '~> 1.1.0' |
36 |
+gem 'faraday', '~> 0.9.0' |
|
37 |
+gem 'faraday_middleware' |
|
36 | 38 |
gem 'typhoeus', '~> 0.6.3' |
37 | 39 |
gem 'nokogiri', '~> 1.6.0' |
38 | 40 |
|
@@ -106,6 +106,8 @@ GEM |
||
106 | 106 |
execjs (2.0.2) |
107 | 107 |
faraday (0.9.0) |
108 | 108 |
multipart-post (>= 1.2, < 3) |
109 |
+ faraday_middleware (0.9.1) |
|
110 |
+ faraday (>= 0.7.4, < 0.10) |
|
109 | 111 |
ffi (1.9.3) |
110 | 112 |
forecast_io (2.0.0) |
111 | 113 |
faraday |
@@ -316,6 +318,8 @@ DEPENDENCIES |
||
316 | 318 |
devise (~> 3.0.0) |
317 | 319 |
dotenv-rails |
318 | 320 |
em-http-request (~> 1.1.2) |
321 |
+ faraday (~> 0.9.0) |
|
322 |
+ faraday_middleware |
|
319 | 323 |
forecast_io (~> 2.0.0) |
320 | 324 |
foreman (~> 0.63.0) |
321 | 325 |
geokit (~> 1.6.7) |
@@ -1,5 +1,6 @@ |
||
1 | 1 |
require 'nokogiri' |
2 |
-require 'typhoeus' |
|
2 |
+require 'faraday' |
|
3 |
+require 'faraday_middleware' |
|
3 | 4 |
require 'date' |
4 | 5 |
|
5 | 6 |
module Agents |
@@ -38,7 +39,7 @@ module Agents |
||
38 | 39 |
|
39 | 40 |
Note that for all of the formats, whatever you extract MUST have the same number of matches for each extractor. E.g., if you're extracting rows, all extractors must match all rows. For generating CSS selectors, something like [SelectorGadget](http://selectorgadget.com) may be helpful. |
40 | 41 |
|
41 |
- Can be configured to use HTTP basic auth by including the `basic_auth` parameter with `username:password`. |
|
42 |
+ Can be configured to use HTTP basic auth by including the `basic_auth` parameter with `"username:password"`, or `["username", "password"]`. |
|
42 | 43 |
|
43 | 44 |
Set `expected_update_period_in_days` to the maximum amount of time that you'd expect to pass between Events being created by this Agent. This is only used to set the "working" status. |
44 | 45 |
|
@@ -103,34 +104,25 @@ module Agents |
||
103 | 104 |
errors.add(:base, "force_encoding must be a string") |
104 | 105 |
end |
105 | 106 |
end |
107 |
+ |
|
108 |
+ begin |
|
109 |
+ basic_auth_credentials() |
|
110 |
+ rescue => e |
|
111 |
+ errors.add(:base, e.message) |
|
112 |
+ end |
|
106 | 113 |
end |
107 | 114 |
|
108 | 115 |
def check |
109 |
- log "Fetching #{options['url']}" |
|
110 | 116 |
check_url options['url'] |
111 | 117 |
end |
112 | 118 |
|
113 | 119 |
def check_url(in_url) |
114 |
- hydra = Typhoeus::Hydra.new |
|
115 |
- request_opts = { :followlocation => true } |
|
116 |
- request_opts[:userpwd] = options['basic_auth'] if options['basic_auth'].present? |
|
117 |
- |
|
118 |
- requests = [] |
|
120 |
+ return unless in_url.present? |
|
119 | 121 |
|
120 |
- if in_url.kind_of?(Array) |
|
121 |
- in_url.each do |url| |
|
122 |
- requests.push(Typhoeus::Request.new(url, request_opts)) |
|
123 |
- end |
|
124 |
- else |
|
125 |
- requests.push(Typhoeus::Request.new(in_url, request_opts)) |
|
126 |
- end |
|
127 |
- |
|
128 |
- requests.each do |request| |
|
129 |
- request.on_failure do |response| |
|
130 |
- error "Failed: #{response.inspect}" |
|
131 |
- end |
|
132 |
- |
|
133 |
- request.on_success do |response| |
|
122 |
+ Array(in_url).each do |url| |
|
123 |
+ log "Fetching #{url}" |
|
124 |
+ response = faraday.get(url) |
|
125 |
+ if response.success? |
|
134 | 126 |
body = response.body |
135 | 127 |
if (encoding = options['force_encoding']).present? |
136 | 128 |
body = body.encode(Encoding::UTF_8, encoding) |
@@ -183,14 +175,14 @@ module Agents |
||
183 | 175 |
error "Got an uneven number of matches for #{options['name']}: #{options['extract'].inspect}" |
184 | 176 |
return |
185 | 177 |
end |
186 |
- |
|
178 |
+ |
|
187 | 179 |
old_events = previous_payloads num_unique_lengths.first |
188 | 180 |
num_unique_lengths.first.times do |index| |
189 | 181 |
result = {} |
190 | 182 |
options['extract'].keys.each do |name| |
191 | 183 |
result[name] = output[name][index] |
192 | 184 |
if name.to_s == 'url' |
193 |
- result[name] = URI.join(request.base_url, result[name]).to_s if (result[name] =~ URI::DEFAULT_PARSER.regexp[:ABS_URI]).nil? |
|
185 |
+ result[name] = (response.env[:url] + result[name]).to_s |
|
194 | 186 |
end |
195 | 187 |
end |
196 | 188 |
|
@@ -200,10 +192,9 @@ module Agents |
||
200 | 192 |
end |
201 | 193 |
end |
202 | 194 |
end |
195 |
+ else |
|
196 |
+ error "Failed: #{response.inspect}" |
|
203 | 197 |
end |
204 |
- |
|
205 |
- hydra.queue request |
|
206 |
- hydra.run |
|
207 | 198 |
end |
208 | 199 |
end |
209 | 200 |
|
@@ -288,6 +279,36 @@ module Agents |
||
288 | 279 |
end |
289 | 280 |
end |
290 | 281 |
|
291 |
- end |
|
282 |
+ def faraday |
|
283 |
+ @faraday ||= Faraday.new { |builder| |
|
284 |
+ builder.use FaradayMiddleware::FollowRedirects |
|
285 |
+ builder.request :url_encoded |
|
286 |
+ if userinfo = basic_auth_credentials() |
|
287 |
+ builder.request :basic_auth, *userinfo |
|
288 |
+ end |
|
292 | 289 |
|
290 |
+ case backend = faraday_backend |
|
291 |
+ when :typhoeus |
|
292 |
+ require 'typhoeus/adapters/faraday' |
|
293 |
+ end |
|
294 |
+ builder.adapter backend |
|
295 |
+ } |
|
296 |
+ end |
|
297 |
+ |
|
298 |
+ def faraday_backend |
|
299 |
+ ENV.fetch('FARADAY_HTTP_BACKEND', 'typhoeus').to_sym |
|
300 |
+ end |
|
301 |
+ |
|
302 |
+ def basic_auth_credentials |
|
303 |
+ case value = options['basic_auth'] |
|
304 |
+ when nil, '' |
|
305 |
+ return nil |
|
306 |
+ when Array |
|
307 |
+ return value if value.size == 2 |
|
308 |
+ when /:/ |
|
309 |
+ return value.split(/:/, 2) |
|
310 |
+ end |
|
311 |
+ raise "bad value for basic_auth: #{value.inspect}" |
|
312 |
+ end |
|
313 |
+ end |
|
293 | 314 |
end |
@@ -348,7 +348,9 @@ describe Agents::WebsiteAgent do |
||
348 | 348 |
|
349 | 349 |
describe "checking with http basic auth" do |
350 | 350 |
before do |
351 |
- stub_request(:any, /user:pass/).to_return(:body => File.read(Rails.root.join("spec/data_fixtures/xkcd.html")), :status => 200) |
|
351 |
+ stub_request(:any, /example/). |
|
352 |
+ with(headers: { 'Authorization' => "Basic #{['user:pass'].pack('m').chomp}" }). |
|
353 |
+ to_return(:body => File.read(Rails.root.join("spec/data_fixtures/xkcd.html")), :status => 200) |
|
352 | 354 |
@site = { |
353 | 355 |
'name' => "XKCD", |
354 | 356 |
'expected_update_period_in_days' => 2, |
@@ -374,4 +376,4 @@ describe Agents::WebsiteAgent do |
||
374 | 376 |
end |
375 | 377 |
end |
376 | 378 |
end |
377 |
-end |
|
379 |
+end |